home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Info-Mac 4
/
Info_Mac IV CD-ROM (Pacific HiTech Inc.)(August 1994).iso
/
Development
/
Source
/
DBL Pascal Library
/
Levenshtein ƒ
/
Levenshtein.p
< prev
next >
Wrap
Text File
|
1992-05-06
|
6KB
|
242 lines
unit Levenshtein;
{Compute the Levenshtein distance between a pair of strings.}
{Adapted from C code presented in "Finding String Distances",}
{Ray Valdés, Dr. Dobb’s Journal, April 1992, ppg. 56— 62, 107.}
{Algorithm due to V.I. Levenshtein, as presented in "Time Warps,}
{String Edits, and MacroMolecules: The Theory and Practice of}
{Sequence Comparison", Sankoff and Kruskal, eds., Addison–Wesley,}
{1983. Macintosh implementation for THINK Pascal by D.B.Lamkins.}
interface
type
Opcode = (match, insert, delete, substitute);
LevOp = record
iA, iB: Integer;
op: Opcode;
end;
LevOps = array[1..255] of LevOp;
LevOpsPtr = ^LevOps;
LevOpsHdl = ^LevOpsPtr;
{Call InitLevDist to establish the costs of the four edit operations.}
procedure InitLevDist (matchCost, insertCost, deleteCost, substituteCost: Integer);
{LevDist returns the Levenshtein distance between the given strings. When non–nil,}
{theOps is resized and filled in with the edit sequence (as defined in the article) and}
{moves is the number of edits.}
function LevDist (a, b: Str255; theOps: LevOpsHdl; var moves: Integer): Integer;
implementation
type
MatrixCell = record
distance: Integer;
op: Opcode;
end;
MatrixCellPtr = ^MatrixCell;
MatrixCellHdl = ^MatrixCellPtr;
Move = record
dRow, dCol: Integer;
end;
var
theMatrix: MatrixCellHdl;
theCost: array[Opcode] of Integer;
theMoves: array[Opcode] of Move;
procedure InitLevDist (matchCost, insertCost, deleteCost, substituteCost: Integer);
begin
theMatrix := nil;
theCost[match] := matchCost;
theCost[insert] := insertCost;
theCost[delete] := deleteCost;
theCost[substitute] := substituteCost;
with theMoves[match] do
begin
dRow := -1;
dCol := -1;
end;
with theMoves[insert] do
begin
dRow := 0;
dCol := -1;
end;
with theMoves[delete] do
begin
dRow := -1;
dCol := 0;
end;
with theMoves[substitute] do
begin
dRow := -1;
dCol := -1;
end;
end;
function LevDist (a, b: Str255; theOps: LevOpsHdl; var moves: Integer): Integer;
var
numRows, numCols: Integer;
procedure InitializeMatrix;
var
i: Integer;
p: MatrixCellPtr;
begin
with theMatrix^^ do
begin
distance := 0;
op := delete;
end;
p := theMatrix^;
for i := 1 to numCols - 1 do
begin
p := MatrixCellPtr(ORD(p) + SIZEOF(MatrixCell));
with p^ do
begin
distance := i;
op := insert;
end;
end;
p := theMatrix^;
for i := 1 to numRows - 1 do
begin
p := MatrixCellPtr(ORD(p) + SIZEOF(MatrixCell) * numCols);
with p^ do
begin
distance := i;
op := delete;
end;
end;
end;
procedure CalculateMatrix;
var
pC, pN, pW, pNW: MatrixCellPtr;
procedure AdvancePtrs;
begin
pC := MatrixCellPtr(ORD(pC) + SIZEOF(MatrixCell));
pN := MatrixCellPtr(ORD(pN) + SIZEOF(MatrixCell));
pW := MatrixCellPtr(ORD(pW) + SIZEOF(MatrixCell));
pNW := MatrixCellPtr(ORD(pNW) + SIZEOF(MatrixCell));
end;
var
row, col: Integer;
procedure CalculateCell;
begin
if pW^.distance < pN^.distance then
if pW^.distance < pNW^.distance then
begin
pC^.op := insert;
pC^.distance := pW^.distance + theCost[insert];
end
else if a[row] = b[col] then
begin
pC^.op := match;
pC^.distance := pNW^.distance + theCost[match];
end
else
begin
pC^.op := substitute;
pC^.distance := pNW^.distance + theCost[substitute];
end
else if pN^.distance < pNW^.distance then
begin
pC^.op := delete;
pC^.distance := pN^.distance + theCost[delete];
end
else if a[row] = b[col] then
begin
pC^.op := match;
pC^.distance := pNW^.distance + theCost[match];
end
else
begin
pC^.op := substitute;
pC^.distance := pNW^.distance + theCost[substitute];
end;
end;
begin
pC := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (numCols + 1));
pN := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (0 + 1));
pW := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (numCols + 0));
pNW := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (0 + 0));
for row := 1 to numRows - 1 do
begin
for col := 1 to numCols - 1 do
begin
CalculateCell;
AdvancePtrs;
end;
AdvancePtrs;
end;
end;
procedure BacktrackMatrix;
var
pC: MatrixCellPtr;
theDistance, index, row, col, deltaRow, deltaCol: Integer;
whichOp: Opcode;
begin
pC := MatrixCellPtr(ORD(theMatrix^) + SIZEOF(MatrixCell) * (numRows * numCols - 1));
theDistance := pC^.distance;
if theOps <> nil then
begin
SetHandleSize(Handle(theOps), (numRows + numCols) * SIZEOF(LevOp));
index := 0;
row := numRows - 1;
col := numCols - 1;
while (row > 0) | (col > 0) do
begin
whichOp := pC^.op;
if whichOp <> match then
begin
index := index + 1;
with theOps^^[index] do
begin
iA := row;
iB := col;
op := whichOp;
end;
end;
with theMoves[whichOp] do
begin
deltaRow := dRow;
deltaCol := dCol;
end;
pC := MatrixCellPtr(ORD(pC) + (deltaRow * numCols + deltaCol) * SIZEOF(MatrixCell));
row := row + deltaRow;
col := col + deltaCol;
end;
end;
SetHandleSize(Handle(theOps), index * SIZEOF(LevOp));
moves := index;
LevDist := theDistance;
end;
var
sizeNeeded: Size;
begin {LevDist}
numRows := length(a) + 1;
numCols := length(b) + 1;
if (theMatrix = nil) | (theMatrix^ = nil) then
theMatrix := MatrixCellHdl(NewHandle(0));
HNoPurge(Handle(theMatrix));
sizeNeeded := Size(SIZEOF(MatrixCell)) * numRows * numCols;
if sizeNeeded > GetHandleSize(Handle(theMatrix)) then
SetHandleSize(Handle(theMatrix), SIZEOF(MatrixCell) * numRows * numCols);
HLock(Handle(theMatrix));
InitializeMatrix;
CalculateMatrix;
BacktrackMatrix;
HUnlock(Handle(theMatrix));
HPurge(Handle(theMatrix));
end;
end.